from method import Qtable
import gymnasium as gym
'''
CTaxi-v3是gym库中一个离散案例
要求使用出租车将乘客送到指定地点
状态描述参数为一个数，表示位置状态，共500种状态
能进行的动作有6个，上下左右移动，乘客上车乘客下车
模型满足连离散状态与离散动作空间
因此可以使用Qlearning来进行求解
'''
def train(max_iter):
    iter = 0
    showR = 0
    env = gym.make('Taxi-v3')
    while iter < max_iter:
        state = env.reset()[0]
        R = 0
        print(iter,showR)
        while True:
            action = QL.choose_action(state)
            new_state, reward, done, _, info = env.step(action)
            QL.learn(state, action, reward, new_state, done)
            state = new_state
            R += reward
            if done:
                showR += R
                if showR>20:
                    env = gym.make('Taxi-v3', render_mode="human")
                iter+=1
                break
            if R<=-200:
                break

QL = Qtable(6,500,0.1,0.01,0.9)
train(10000000)
